import scrapy
from icecream import icecream
from scrapy import cmdline, Selector
from xl_spider import items  # 要将爬虫项目xl_spider标记为源根目录sources root
from scrapy.http import HtmlResponse


class PicPicSpider(scrapy.Spider):
    name = 'pic_Pic'
    # allowed_domains = ['pic.netbian.com']
    start_urls = ['https://pic.netbian.com/4kdongman/']
    host_url = 'https://pic.netbian.com'

    def parse(self, response):

        sel = Selector(text=response.text)
        tags = sel.xpath('//div[@class="slist"]/ul/li')
        for j in tags:
            item = items.XlSpiderItem()
            urls = j.xpath('./a/img/@src').extract_first()
            url = self.host_url + urls
            title = j.xpath('./a/b/text()').extract_first()
            # icecream.ic(url,title)
            item['title'] = title
            item['image'] = url
            yield item
            # icecream.ic(item)
        next = sel.xpath('//div[@class="page"]/a[contains(text(),"下一页")]/@href')
        if next:
            next = next.extract_first()
            next_url = self.host_url+next
            yield scrapy.Request(next_url,callback=self.parse)




if __name__ == '__main__':
    cmdline.execute('scrapy crawl pic_Pic'.split())
